

CompScans_95toA <- function(dataf,datafA){
  
  
  datafA$PTM_Final_prob <- datafA$Score*datafA$PTM.Score
  dataf$PTM_Final_prob <- dataf$Score*dataf$PTM.Score
  
  datafA$Unique_scan <- datafA$Spectrum
  dataf$Unique_scan <- str_remove(dataf$Spectrum,'.mzML')
  dataf$Sequence <- dataf$Peptide
  datafA$Sequence <- datafA$Peptide
  
dataf$Peptidoform <- paste0(dataf$Peptide_mod,"_", dataf$PTM.positions)

dataf_pform <- dataf %>% group_by(Peptidoform) %>% top_n(1, PTM_Final_prob)

dataf_95_pform <- dataf_pform[dataf_pform $PTM_Final_prob>=0.95,]

Reduceddataf_95 <- subset(dataf_95_pform,select = c(Unique_scan,Sequence,PTM.positions,PTM_Final_prob))

Reduceddataf_95 <- Reduceddataf_95 %>% dplyr::rename(
  pSTY_Score = PTM_Final_prob
)

ReduceddatafA <- subset(datafA,select = c(Unique_scan,Sequence,PTM.positions,PTM_Final_prob))

ReduceddatafA <- ReduceddatafA %>% dplyr::rename(
  SequenceSTYA = Sequence,
  PTM.positionsSTYA = PTM.positions,
  pASTY_Score = PTM_Final_prob
)


dataf_Merged_95 <- merge(x=Reduceddataf_95,y=ReduceddatafA,by="Unique_scan", all.x = TRUE)
dataf_Merged_95<- dataf_Merged_95 %>% group_by(Unique_scan) %>% dplyr::mutate(count_S = n())
dataf_Merged_95<- dataf_Merged_95 %>% group_by(Unique_scan) %>% dplyr::mutate(count_M = n_distinct(PTM.positions))


dataf_Merged_95$SeqMatch <- ifelse(dataf_Merged_95$Sequence==dataf_Merged_95$SequenceSTYA,1,0)
dataf_Merged_95$PTM_Match <- ifelse(dataf_Merged_95$Sequence==dataf_Merged_95$SequenceSTYA&dataf_Merged_95$PTM.positions==dataf_Merged_95$PTM.positionsSTYA,1,0)
dataf_Merged_95$SiteID <- paste0(dataf_Merged_95$Unique_scan,"_", dataf_Merged_95$PTM.positions)


# Results #
###########

NoMatchSeq_95 <- subset(dataf_Merged_95,SeqMatch==0 | is.na(SeqMatch))

MatchSeq_95 <- subset(dataf_Merged_95,SeqMatch==1 )

Scans_NMSeq_95 <- length(unique(NoMatchSeq_95$Unique_scan))

Scans_MSeq_95 <- length(unique(MatchSeq_95$Unique_scan))


MatchSeq_95 <-MatchSeq_95 %>% group_by(Unique_scan) %>%  mutate(sum_matches=sum(PTM_Match))

MatchSeq_95$Flag <- MatchSeq_95$count_M - MatchSeq_95$sum_matches

MatchSeq_MSites_95 <- subset(MatchSeq_95,Flag==0)

MatchSeq_NoMSites_95 <- subset(MatchSeq_95,Flag==1)

Scans_MSeq_NMSite_95 <- length(unique(MatchSeq_NoMSites_95$SiteID))

Scans_MSeq_MSite_95 <- length(unique(MatchSeq_MSites_95$SiteID))

NoMatchSeq_Alternative_95 <- subset(NoMatchSeq_95,PTM_Match==0)

NoMatchSeq_NoAlternative_95 <- subset(NoMatchSeq_95,is.na(PTM_Match))

Scans_NMSeq_Alternative_95 <- length(unique(NoMatchSeq_Alternative_95$SiteID))

Scans_NMSeq_NoAlternative_95 <- length(unique(NoMatchSeq_NoAlternative_95$SiteID))

Results_95 <- rbind(Scans_NMSeq_95,Scans_MSeq_95,Scans_MSeq_NMSite_95,Scans_MSeq_MSite_95,Scans_NMSeq_Alternative_95,Scans_NMSeq_NoAlternative_95)


return(list(Results_95))

}